/*************************************************************************
 * The contents of this file are subject to the MYRICOM MYRINET          *
 * EXPRESS (MX) NETWORKING SOFTWARE AND DOCUMENTATION LICENSE (the       *
 * "License"); User may not use this file except in compliance with the  *
 * License.  The full text of the License can found in LICENSE.TXT       *
 *                                                                       *
 * Software distributed under the License is distributed on an "AS IS"   *
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See  *
 * the License for the specific language governing rights and            *
 * limitations under the License.                                        *
 *                                                                       *
 * Copyright 2003 - 2004 by Myricom, Inc.  All rights reserved.          *
 *************************************************************************/

static const char __idstring[] = "@(#)$Id: mx_peer.c,v 1.40.2.5 2006/11/14 21:24:09 loic Exp $";

#include "mx_arch.h"
#include "mx_misc.h"
#include "mx_instance.h"
#include "mx_malloc.h"
#include "mx_peer.h"
#include "mx_pio.h"

int mx_peer_hash_size;
int mx_biggest_peer;
int mx_peer_overflow_count;
int mx_max_hash_nodes;
mx_peer_t *mx_peer_table;
mx_peer_hash_t *mx_peer_hash;
mx_peer_hash_t *mx_peer_overflow;
mx_sync_t mx_peer_sync;
char mx_default_hostname[MX_MAX_STR_LEN];


int
mx_init_peers(void)
{
  mx_max_hash_nodes = mx_max_nodes * 2;
  if (mx_max_hash_nodes < 4096)
    mx_max_hash_nodes = 4096;
  mx_peer_hash_size = mx_max_hash_nodes * 4;

  mx_peer_table = mx_kmalloc(sizeof(mx_peer_table[0]) * mx_max_nodes, 
			     MX_MZERO|MX_WAITOK);
  if (!mx_peer_table)
    goto abort_with_nothing;

  mx_peer_overflow = mx_kmalloc(sizeof(mx_peer_overflow[0]) * mx_max_hash_nodes, 
				MX_MZERO|MX_WAITOK);
  if (!mx_peer_overflow)
    goto abort_with_mx_peer_table;

  mx_peer_hash = mx_kmalloc(sizeof(mx_peer_hash[0]) * mx_peer_hash_size,
			    MX_MZERO|MX_WAITOK);

  if (!mx_peer_hash)
    goto abort_mx_peer_overflow;

  mx_sync_init(&mx_peer_sync, 0, 0, "peer sync");

  return 0;

 abort_mx_peer_overflow:
  mx_kfree(mx_peer_overflow);
  
 abort_with_mx_peer_table:
  mx_kfree(mx_peer_table);

 abort_with_nothing:
  return ENOMEM;
}

void
mx_destroy_peers()
{
  if (mx_peer_overflow)
    mx_kfree(mx_peer_overflow);
  if (mx_peer_table)
    mx_kfree(mx_peer_table);
  if (mx_peer_hash) {
    mx_kfree(mx_peer_hash);
    mx_sync_destroy(&mx_peer_sync);
  }
}

static inline int
mx_peer_hash_fn(uint16_t a, uint16_t b)
{
  int val;

  val  = (a ^ b) & (mx_peer_hash_size - 1 );
  return val;
}

static mx_peer_hash_t *
mx_peer_lookup_bin(mx_peer_hash_t **freebin, uint16_t mac_high16, uint32_t mac_low32)
{
  int i, high, low;
  uint64_t key, tmp;
  unsigned index;

  /* lookup the mac address in our peer hash table */
  index = mx_peer_hash_fn((uint16_t)(mac_low32 >> 16), (uint16_t)mac_low32);
  /* incorrect call to test collision table 
    index = mx_peer_hash_fn((uint16_t)(mac_low32 >> 16), mac_high16); 
   */
  if (mx_peer_hash[index].mac_low32 == mac_low32 &&
      mx_peer_hash[index].mac_high16 == mac_high16) {
    return mx_peer_hash + index; /* match! */
  }

  /* if the address doesn't match, check to see if there
     is a collision, or if the address hasn't been seen */

  if (mx_peer_hash[index].mac_low32 == 0 &&
      mx_peer_hash[index].mac_high16 == 0) {
    *freebin = mx_peer_hash + index;
    return NULL; 
  }

  /* A collision occured, so it will be in the overflow table.  This
     table is kept sorted so we can do a binary search */

  low = 0;
  high = mx_peer_overflow_count;
  key = ((uint64_t)mac_high16 << 32) | mac_low32;
  do {
    i = (high + low) / 2;
    tmp = ((uint64_t)mx_peer_overflow[i].mac_high16 << 32) 
      | mx_peer_overflow[i].mac_low32;
    if (key == tmp) {
      /* return ESRCH to let the caller know the index pertains
	 to the overflow table, not the peer hash. */
      return mx_peer_overflow + i;
    }
    if (key <= tmp || tmp == 0)
      high = i - 1;
    else
      low = i + 1;
  } while (low <= high);

  /* we get here if the there was a collision, but the address
     isn't already in the overflow table */
  *freebin = NULL;
  return NULL;
}

mx_peer_hash_t *
mx_peer_lookup(uint16_t mac_high16, uint32_t mac_low32)
{
  mx_peer_hash_t *free_bin, *bin;
  
  bin = mx_peer_lookup_bin(&free_bin, mac_high16, mac_low32);
  if (bin && bin->index == MX_PEER_INVALID)
    bin = NULL;
  return bin;
    
}

int
mx_peer_remove(uint16_t mac_high16, uint32_t mac_low32)
{
  int peer_idx, status = 0;
  mx_peer_hash_t *hash;

  mx_mutex_enter(&mx_peer_sync);
  hash = mx_peer_lookup(mac_high16, mac_low32);
  if (hash) {
    /* found in main hash table */
    peer_idx = hash->index;
    bzero(hash, sizeof (*hash));
    bzero(&mx_peer_table[peer_idx], sizeof (mx_peer_table[peer_idx]));
  } else {
    status = ESRCH;
  }

  mx_mutex_exit(&mx_peer_sync);
  return status;
}


int
mx_peer_from_hostname(mx_peer_t *peer)
{
  int i;
  size_t len;

  len = strlen(peer->node_name) + 1;
  /* XXX locking? */
  for (i = 0; i <= mx_biggest_peer; i++) {
    if (!strncmp(peer->node_name, mx_peer_table[i].node_name, len)) {
      bcopy(&mx_peer_table[i], peer, sizeof(*peer));
      return 0;
    }
  }
  return ENOENT;
}

static int
mx_peer_query_needed(mx_instance_state_t *is, int peer_index)
{
  mx_peer_t *peer;
  mx_routes_t *routes0, *routes1;

  peer = mx_peer_table + peer_index;
  routes0 =  &is->routes[0];
  routes1 =  &is->routes[is->num_ports - 1]; /* last set of route */
  return peer->node_name[0] == '\0' /* skip named peers */
    && peer->type == MX_MX_HOST_TYPE /* skip non-MX nodes */
    && (routes0->offsets[peer_index] != 0  /* skip unreachable peers */
	|| routes1->offsets[peer_index] != 0);

}

void
mx_query_peer(mx_instance_state_t *is, int peer)
{
  mcp_kreq_t kreq;
  unsigned long flags;
  int starting_peer;

  flags = 0; /* useless initialization to pacify -Wunused on platforms
		where flags are not used */

  mx_spin_lock_irqsave(&is->kreqq_spinlock, flags);
  if (is->query_pending)
    goto abort_with_spinlock;


  if (peer > mx_biggest_peer)
    peer = 0;
  starting_peer = peer;

  while (!mx_peer_query_needed(is, peer)) {
    peer += 1;
    if (peer > mx_biggest_peer)
      peer = 0;
    if (peer == starting_peer)
      goto abort_with_spinlock; /* all queries have been done */
  }

  kreq.query.req.peer_index = htons((uint16_t) peer);
  kreq.query.req.query_type = MX_MCP_QUERY_HOSTNAME; /* 1 byte, no swapping */ 
  kreq.query.req.type = MX_MCP_KREQ_QUERY; /* 1 byte, no swapping */ 
  
  is->board_ops.write_kreq(is, &kreq);
  is->query_pending = 1;

 abort_with_spinlock:
  mx_spin_unlock_irqrestore(&is->kreqq_spinlock, flags);
}


void
mx_update_peer_type(uint32_t type, uint32_t peer_index, int force)
{

  if (type == mx_peer_table[peer_index].type && force == 0)
    return;

  mx_peer_table[peer_index].type = type;

  if (mx_peer_table[peer_index].flags & MX_PEER_FLAG_LOCAL)
    return;

  mx_mutex_enter(&mx_peer_sync);

  switch(type) {
  case MX_MX_HOST_TYPE:
    bzero(mx_peer_table[peer_index].node_name, 
	  sizeof(mx_peer_table[peer_index].node_name));
    break;

  case MX_GM_HOST_TYPE:
    strcpy(mx_peer_table[peer_index].node_name, "GM node");
    break;

  case MX_XM_HOST_TYPE:
    strcpy(mx_peer_table[peer_index].node_name, "XM node");
    break;

  default:
    strcpy(mx_peer_table[peer_index].node_name, "non-MX node");
    break;
  }

  mx_mutex_exit(&mx_peer_sync);
}


void
mx_add_peers(mx_instance_state_t *is)
{
  int i, status, hash_index, local_peer_index;
  uint32_t dummy, mac_high16, mac_low32;
  mx_sync_t tmp_sync;
  mx_peer_hash_t * hash;
    
  mx_sync_init(&tmp_sync, is, 0, "add peers sync");
  for (i = MX_MIN_PEER; i < mx_max_nodes; i++) {
    if (mx_peer_table[i].mac_low32 == 0 &&
	mx_peer_table[i].mac_high16 == 0)
      break;
    status = mx_lanai_command(is, MX_MCP_CMD_ADD_PEER, i,
			      mx_peer_table[i].mac_high16, 
			      mx_peer_table[i].mac_low32,  
			      &dummy, &tmp_sync);
    if (status) {
      MX_WARN(("Board %d: Failed to set LATE peer, status = %d\n", 
	       is->id, status));
    }
    
  }
  mx_sync_destroy(&tmp_sync);

  status = mx_mcpi.get_param(is->id, (volatile uint8_t *)NULL, "mac_high16",
			     &mac_high16);
  status |= mx_mcpi.get_param(is->id, (volatile uint8_t *)NULL, "mac_low32",
			      &mac_low32);
  if (status) {
    MX_WARN(("mx%d: Can't find my mac to set peer?\n", is->id));
    return;
  }

  /* add ourself to the peer table */
  mx_add_peer(&hash_index, (uint16_t)mac_high16, mac_low32);

  /* find our peer index table */

  hash = mx_peer_lookup((uint16_t)mac_high16, mac_low32);
  mx_assert(hash);
  local_peer_index = hash->index;

  /* mark ourselves local */
  mx_peer_table[local_peer_index].flags |= MX_PEER_FLAG_LOCAL;

  /* Tell the mcp what his peer index is */
  status = mx_mcpi.set_param(is->id, is->lanai.sram, "local_peer_index",
			     local_peer_index);
  if (status)
    MX_WARN(("mx%d: Failed to set local peer index to %d\n", is->id, 
	     local_peer_index));

  /* set the hostname on this board */
  mx_set_hostname(is, 0);
  mx_update_peer_type(MX_MX_HOST_TYPE, local_peer_index, 0);
}


static mx_peer_hash_t *
mx_add_peer_hash_overflow(uint16_t mac_high16, uint32_t mac_low32)
{
  uint64_t key, tmp;
  unsigned index;
  int i;

  /* Add to peer overflow table.  To keep the overflow table sorted,
     we do a simple insertion sort */
  mx_peer_overflow_count++;
  key = ((uint64_t)mac_high16 << 32) | mac_low32;
  
  /* find the place to insert */
  for (index = 0; index < mx_max_hash_nodes; index++) {
    tmp = ((uint64_t)mx_peer_overflow[index].mac_high16 << 32)
      | mx_peer_overflow[index].mac_low32;
    if (key < tmp || tmp == 0)
      break;
  }

  /* find the end of the array */
  for (i = index; i < mx_max_hash_nodes; i++) {
    if (mx_peer_overflow[i].mac_low32 == 0 && 
	mx_peer_overflow[i].mac_high16 == 0)
      break;
  }

  /* we should never run out of space */
  mx_assert(i < mx_max_hash_nodes);

  /* shuffle elements up to make space */
  for (/*nothing*/; i > index; i--) {
    bcopy(&mx_peer_overflow[i-1], &mx_peer_overflow[i],
	  sizeof(mx_peer_overflow[0]));
  }
  
  return mx_peer_overflow + index;
}

int
mx_add_peer(int *hash_index, uint16_t mac_high16, uint32_t mac_low32)
{
  mx_instance_state_t *is;
  mx_peer_hash_t * bin, *hash;
  int lookup, ret;
  int i, peer_index, status;
  static int warned;

  ret = 0;
  /* grab the peer lock to make sure the peer table
     does not change out from under us */

  mx_mutex_enter(&mx_peer_sync);
  
  /* redo the lookup with the lock held to make sure another board
     didn't add the peer */

  hash = mx_peer_lookup_bin(&bin, mac_high16, mac_low32);
  if (hash && hash->index != MX_PEER_INVALID)
    goto abort_with_sync;

  /* find space in the peer table.  Scan linearly -- peer table
     insertions are rare.  We can afford to be inefficent */

  for (peer_index = MX_MIN_PEER; peer_index < mx_max_nodes; peer_index++) {
    if (0 && mx_peer_table[peer_index].mac_low32 == mac_low32 &&
	mx_peer_table[peer_index].mac_high16 == mac_high16) {
      MX_WARN(("peer 0x%x 0x%x already in table at index %d, (%d, %d)\n", 
	       mac_high16, mac_low32, peer_index, *hash_index, lookup));
      /*panic("mx bad bad bad");*/
    }
    if (mx_peer_table[peer_index].mac_low32 == 0 &&
	mx_peer_table[peer_index].mac_high16 == 0)
      break;
  }

  if (peer_index >= mx_max_nodes) {
    if (!warned) {
      MX_WARN(("Attempted to add more than the configured %d number of nodes\n"
	       "Consider increasing mx_max_nodes\n", mx_max_nodes));
      warned = 1;
    }
    ret = ENOSPC;
    goto abort_with_sync;
  }
  if (peer_index > mx_biggest_peer)
    mx_biggest_peer = peer_index;

  /* add entry to the peer table at index i */
  mx_peer_table[peer_index].mac_low32 = mac_low32;
  mx_peer_table[peer_index].mac_high16 = mac_high16;
  mx_peer_table[peer_index].flags = 0;

  /* add to peer hash */
  if (hash) /* indirect entry upgraded */
    bin = hash;
  if (!bin) {
    bin = mx_add_peer_hash_overflow(mac_high16, mac_low32);
  }
  bin->mac_low32 = mac_low32;
  bin->mac_high16 = mac_high16;
  bin->index = peer_index;
  bin->gw = MX_PEER_INVALID;

  mx_mutex_exit(&mx_peer_sync);
  /* Tell the lanai about the new peer */
  for (i = 0; i < mx_max_instance; i++) {
    is = mx_instances[i];
    if (!is)
      continue;
    status = mx_async_lanai_command(is, MX_MCP_CMD_ADD_PEER, peer_index,
				    mac_high16, mac_low32);
    if (status) {
      MX_WARN(("Board %d: Failed to set peer, status = %d\n", is->id, status));
    }
  }
  return 0;

 abort_with_sync:
  mx_mutex_exit(&mx_peer_sync);
  return ret;

}

mx_peer_hash_t *
mx_peer_lookup_eth(uint16_t mac_high16, uint32_t mac_low32, int create)
{
  mx_peer_hash_t *free_bin, *bin;
  
  bin = mx_peer_lookup_bin(&free_bin, mac_high16, mac_low32);
  if (!bin && create && mx_max_hash_nodes - mx_peer_overflow_count > mx_max_nodes) {
    mx_mutex_enter(&mx_peer_sync);
    bin = mx_peer_lookup_bin(&free_bin, mac_high16, mac_low32);
    if (!bin) {
      if (!free_bin)
	free_bin = mx_add_peer_hash_overflow(mac_high16, mac_low32);
      if (free_bin) {
	free_bin->mac_high16 = mac_high16;
	free_bin->mac_low32 = mac_low32;
	free_bin->index = MX_PEER_INVALID;
	free_bin->gw = MX_PEER_INVALID;
      }
      bin = free_bin;
    }
    mx_mutex_exit(&mx_peer_sync);
  }
  return bin;
}

void
mx_name_peer(mx_instance_state_t *is, int peer)
{
  size_t len;
  unsigned long flags;

  flags = 0; /* useless initialization to pacify -Wunused on platforms
		where flags are not used */

  mx_spin_lock_irqsave(&is->kreqq_spinlock, flags);
  /* make sure strlen never walks off a non-null terminated string */
  is->host_query.buf[MX_VPAGE_SIZE -1] = '\0';
  len = strlen(is->host_query.buf);
  if (peer > mx_biggest_peer) {
    MX_WARN(("mx%d: Can't name peer (%d) which is larger than biggest peer (%d)",
	     is->id, peer, mx_biggest_peer));
  } else if (len) {
    len = MIN(len, sizeof(mx_peer_table[peer].node_name) - 1);

    /* update the peer table */
    strncpy(mx_peer_table[peer].node_name, is->host_query.buf, len);

    /* null first byte of reply buffer, so that a timeout doesn't
       result in the last completed query result being duplicated */
    is->host_query.buf[0] = '\0';
  }
  is->query_pending = 0;
  mx_spin_unlock_irqrestore(&is->kreqq_spinlock, flags);
  mx_query_peer(is, peer + 1);
}

void
mx_set_hostname(mx_instance_state_t *is, char *username)
{  
  char *c, *hostname;
  size_t len;
  uint32_t hostname_len, hostname_addr, mac_high16, mac_low32;
  int status, peer_index;
  mx_peer_hash_t *hash;

  if (username)
    hostname = username;
  else
    hostname = mx_default_hostname;

  len = strlen(hostname);
  if (!len)
    return;
  
  /* get size & location of mcp hostname field */
  status = mx_mcpi.get_param(is->id, is->lanai.sram, "hostname_len", 
			     &hostname_len);
  status |= mx_mcpi.get_param(is->id, is->lanai.sram, "hostname_addr", 
			      &hostname_addr);
  if (status) {
    MX_WARN(("mx%d: Can't determine location of MCP hostname buffer\n", 
	     is->id));
  }

  c = ((char *)is->lanai.sram + hostname_addr);
  c[hostname_len - 1] = '\0';
  if (username) /* assume user knows what he is doing
		   and don't append trailing :INTERFACE_ID */
    snprintf(c, hostname_len - 1, "%s", hostname);
  else
    snprintf(c, hostname_len - 1, "%s:%d", hostname, is->id);
  is->flags |= MX_HOSTNAME_SET;

  /* update the peer table with our own name */

  status = mx_mcpi.get_param(is->id, (volatile uint8_t *)NULL, "mac_high16", 
			     &mac_high16);
  status |= mx_mcpi.get_param(is->id, (volatile uint8_t *)NULL, "mac_low32", 
			      &mac_low32);
  if (status) {
    MX_WARN(("mx%d: Can't find my mac to set name?\n", is->id));
    return;
  }

  hash = mx_peer_lookup(mac_high16, mac_low32);
  /* find index into peer table */
  if (!hash) {
    MX_WARN(("mx%d: Can't find my peer idx to set name?\n", is->id));
    return;
  }
  peer_index = hash->index;
  c = mx_peer_table[peer_index].node_name;
  if (username) /* assume user knows what he is doing
		   and don't append trailing :INTERFACE_ID */
    snprintf(c, hostname_len - 1, "%s", hostname);
  else
    snprintf(c, hostname_len - 1, "%s:%d", hostname, is->id);


}

void
mx_clear_peer_names(void)
{
  int i;
  mx_instance_state_t *is;

  /* clear all the peer names */
  for (i = 0; i <= mx_biggest_peer; i++) {
    mx_update_peer_type(mx_peer_table[i].type, i, 1);
  }

  /* now start the queries going again on all nics */
  mx_mutex_enter(&mx_global_mutex);
  for (i = 0; i < mx_max_instance; ++i) {
    is = mx_instances[i];
    if (!is)
      continue;
    mx_query_peer(is, 0);
  }
  mx_mutex_exit(&mx_global_mutex);
}
